
source('make_dataset.R')

df12r = open_dataset('df12_full.parquet')
df16r = open_dataset('df16_full.parquet')
df20r = open_dataset('df20_full.parquet')

# A9 ----------------------------------------------------------------------

# make income data

s1 = df20r %>%
  filter(!is.na(income_est)) %>%
  group_by(income_est) %>%
  summarise(
    biden = sum(d_ind) / n(),
    trump20 = sum(r_ind) / n(),
    N20 = n()
  ) %>%
  collect() %>%
  arrange(income_est)

s1 = data.table(s1)
s1[, pctile := cumsum(N20) / sum(N20)]
s1[, pctile := c(0, lag(pctile)[2:.N])]
s1[, cycle := 2020]

s2 = df16r %>%
  filter(!is.na(income_est)) %>%
  group_by(income_est) %>%
  summarise(
    clinton = sum(d_ind) / n(),
    trump16 = sum(r_ind) / n(),
    N20 = n()
  ) %>%
  collect() %>%
  arrange(income_est)

s2 = data.table(s2)
s2[, pctile := cumsum(N20) / sum(N20)]
s2[, pctile := c(0, lag(pctile)[2:.N])]
s2[, cycle := 2016]

s3 = df12r %>%
  filter(!is.na(income_est)) %>%
  group_by(income_est) %>%
  summarise(
    obama12 = sum(d_ind) / n(),
    romney = sum(r_ind) / n(),
    N20 = n()
  ) %>%
  collect() %>%
  arrange(income_est)

s3 = data.table(s3)
s3[, pctile := cumsum(N20) / sum(N20)]
s3[, pctile := c(0, lag(pctile)[2:.N])]
s3[, cycle := 2012]

out_summ_i = rbindlist(list(s1[, c(6, 2:3)], s2[, c(6, 2:3)], s3[, c(6, 2:3)]), use.names = F)

INCOME_PCTILES = rowMeans(cbind(s1$pctile, s2$pctile, s3$pctile))

out_summ_i$pctile = INCOME_PCTILES  %>% rep(3)

setnames(out_summ_i, names(out_summ_i), c('cycle', 'd', 'r', 'pctile'))

out_summ_i = melt(out_summ_i, id.vars = c(1, 4))

out_summ_i[, variable := str_c(variable, cycle)]

coll= c(unique(out_summ_i$pctile),1)

df20[, nat_dec := make_bins(total, coll)]
df16[, nat_dec := make_bins(total, coll)]
df12[, nat_dec := make_bins(total, coll)]

s1d = df20[,.(d2020 = sum(d_ind)/.N, r2020 = sum(r_ind)/.N, N20 = .N),
           .(nat_dec)][order(nat_dec)]
s2d = df16[,.(d2016 = sum(d_ind)/.N, r2016 = sum(r_ind)/.N, N16 = .N),
           .(nat_dec)][order(nat_dec)]
s3d = df12[,.(r2012 = sum(r_ind)/.N, d2012 = sum(d_ind)/.N, N12 = .N),
           .(nat_dec)][order(nat_dec)]

out_summ = rbindlist(list(cbind(s1d[, 1:3], s2d[, 2:3], s3d[, 2:3])), use.names = T, fill = T)
out_summ[, nat_dec := as.numeric(as.character(nat_dec))]
out_summ = melt(out_summ, id.vars = 1)

out_summ_i[, cycle := NULL]
out_summ_i[, pctile := round(pctile * 100, 0)]

out_summ[, nat_dec := round(nat_dec * 100, 0)]
setnames(out_summ, 'nat_dec', 'pctile')

out_summ_i[, measure := 'Income (L2)']
out_summ[, measure := 'NWR']

income_out = rbindlist(list(out_summ, out_summ_i))
income_out[, party_measure := toupper(substr(variable, 1, 1)) %>% str_c(' ', measure)]

# make net worth data

s1 = df20r %>%
  filter(!is.na(net_worth)) %>%
  group_by(net_worth) %>%
  summarise(
    biden = sum(d_ind) / n(),
    trump20 = sum(r_ind) / n(),
    N20 = n()
  ) %>%
  collect() %>%
  arrange(net_worth)

s1 = data.table(s1)
s1[, pctile := cumsum(N20) / sum(N20)]
s1[, pctile := c(0, lag(pctile)[2:.N])]
s1[, cycle := 2020]

s2 = df16r %>%
  filter(!is.na(net_worth)) %>%
  group_by(net_worth) %>%
  summarise(
    clinton = sum(d_ind) / n(),
    trump16 = sum(r_ind) / n(),
    N20 = n()
  ) %>%
  collect() %>%
  arrange(net_worth)

s2 = data.table(s2)
s2[, pctile := cumsum(N20) / sum(N20)]
s2[, pctile := c(0, lag(pctile)[2:.N])]
s2[, cycle := 2016]

s3 = df12r %>%
  filter(!is.na(net_worth)) %>%
  group_by(net_worth) %>%
  summarise(
    obama12 = sum(d_ind) / n(),
    romney = sum(r_ind) / n(),
    N20 = n()
  ) %>%
  collect() %>%
  arrange(net_worth)

s3 = data.table(s3)
s3[, pctile := cumsum(N20) / sum(N20)]
s3[, pctile := c(0, lag(pctile)[2:.N])]
s3[, cycle := 2012]

out_summ_w = rbindlist(list(s1[, c(6, 2:3)], s2[, c(6, 2:3)], s3[, c(6, 2:3)]), use.names = F)

NET_WORTH_PCTILES = rowMeans(cbind(s1$pctile, s2$pctile, s3$pctile))

out_summ_w$pctile = NET_WORTH_PCTILES  %>% rep(3)

setnames(out_summ_w, names(out_summ_w), c('cycle', 'd', 'r', 'pctile'))

out_summ_w = melt(out_summ_w, id.vars = c(1, 4))

out_summ_w[, variable := str_c(variable, cycle)]

coll = c(unique(out_summ_w$pctile), 1)

df20[, nat_dec := make_bins(total, coll)]
df16[, nat_dec := make_bins(total, coll)]
df12[, nat_dec := make_bins(total, coll)]

s1d = df20[,.(d2020 = sum(d_ind)/.N, r2020 = sum(r_ind)/.N, N20 = .N),
           .(nat_dec)][order(nat_dec)]
s2d = df16[,.(d2016 = sum(d_ind)/.N, r2016 = sum(r_ind)/.N, N16 = .N),
           .(nat_dec)][order(nat_dec)]
s3d = df12[,.(r2012 = sum(r_ind)/.N, d2012 = sum(d_ind)/.N, N12 = .N),
           .(nat_dec)][order(nat_dec)]

out_summ2 = rbindlist(list(cbind(s1d[, 1:3], s2d[, 2:3], s3d[, 2:3])), use.names = T, fill = T)
out_summ2[, nat_dec := as.numeric(as.character(nat_dec))]
out_summ2 = melt(out_summ2, id.vars = 1)

out_summ_w[, cycle := NULL]
out_summ_w[, pctile := round(pctile * 100, 0)]
out_summ_w

out_summ2[, nat_dec := round(nat_dec * 100, 0)]
setnames(out_summ2, 'nat_dec', 'pctile')

out_summ_w[, measure := 'Net Worth (L2)']
out_summ2[, measure := 'NWR']

wealth_out = rbindlist(list(out_summ2, out_summ_w))
wealth_out[, party_measure := toupper(substr(variable, 1, 1)) %>% str_c(' ', measure)]

save(income_out, wealth_out, file = 'summary_data/figA9.rda')


# A10 ---------------------------------------------------------------------

# make income data

df12_full = df12r %>%
  filter(!is.na(income_est)) %>%
  filter(r_ind == 1 | d_ind == 1) %>%
  select(component, income_est) %>%
  as.data.table() %>%
  arrange(income_est)

keys = data.table(income_est = unique(df12_full[, 'income_est'])$income_est, pctile = INCOME_PCTILES)
df12_full = merge(df12_full, keys, by = 'income_est')

d12 = df12_full[component %chin% donors[['d12_inds']]]
d12[, subsq_giver := component %chin% donors[['d16_inds']]]
d12 = d12[, .(sq = mean(subsq_giver)), pctile]
d12[, class := 'Obama to Clinton']
d12[, cycle := '2012 to 2016']

r12 = df12_full[component %chin% donors[['r12_inds']]]
r12[, subsq_giver := component %chin% donors[['r16_inds']]]
r12 = r12[, .(sq = mean(subsq_giver)), pctile]
r12[, class := 'Romney to Trump \'16']
r12[, cycle := '2012 to 2016']

d20 = df12_full[component %chin% donors[['d12_inds']]]
d20[, subsq_giver := component %chin% donors[['d20_inds']]]
d20 = d20[, .(sq = mean(subsq_giver)), pctile]
d20[, class := 'Obama to Biden']
d20[, cycle := '2012 to 2020']

r20 = df12_full[component %chin% donors[['r12_inds']]]
r20[, subsq_giver := component %chin% donors[['r20_inds']]]
r20 = r20[, .(sq = mean(subsq_giver)), pctile]
r20[, class := 'Romney to Trump \'20']
r20[, cycle := '2012 to 2020']

fig6_income = rbindlist(list(d12, r12, d20, r20))
fig6_income[, pctile := round(pctile * 100)]
fig6_income[, measure := 'Income (L2)']

# recut quantiles in original data and compare

coll = c(INCOME_PCTILES, 1)

df20[, nat_dec := make_bins(total, coll)]
df16[, nat_dec := make_bins(total, coll)]
df12[, nat_dec := make_bins(total, coll)]

d12 = df12[component %chin% donors[['d12_inds']]]
d12[, subsq_giver := component %chin% donors[['d16_inds']]]
d12 = d12[, .(sq = mean(subsq_giver)), nat_dec]
d12[, class := 'Obama to Clinton']
d12[, cycle := '2012 to 2016']

r12 = df12[component %chin% donors[['r12_inds']]]
r12[, subsq_giver := component %chin% donors[['r16_inds']]]
r12 = r12[, .(sq = mean(subsq_giver)), nat_dec]
r12[, class := 'Romney to Trump \'16']
r12[, cycle := '2012 to 2016']

d20 = df12[component %chin% donors[['d12_inds']]]
d20[, subsq_giver := component %chin% donors[['d20_inds']]]
d20 = d20[, .(sq = mean(subsq_giver)), nat_dec]
d20[, class := 'Obama to Biden']
d20[, cycle := '2012 to 2020']

r20 = df12[component %chin% donors[['r12_inds']]]
r20[, subsq_giver := component %chin% donors[['r20_inds']]]
r20 = r20[, .(sq = mean(subsq_giver)), nat_dec]
r20[, class := 'Romney to Trump \'20']
r20[, cycle := '2012 to 2020']

fig6_income_corr = rbindlist(list(d12, r12, d20, r20))
fig6_income_corr[, nat_dec := as.numeric(as.character(nat_dec))]
fig6_income_corr[, nat_dec := round(nat_dec * 100)]
setnames(fig6_income_corr, 'nat_dec', 'pctile')
fig6_income_corr[, measure := 'NWR']

# net worth

df12_full = df12r %>%
  filter(!is.na(net_worth)) %>%
  filter(r_ind == 1 | d_ind == 1) %>%
  select(component, net_worth) %>%
  as.data.table() %>%
  arrange(net_worth)

keys = data.table(net_worth = unique(df12_full[, 'net_worth'])$net_worth, pctile = NET_WORTH_PCTILES)
df12_full = merge(df12_full, keys, by = 'net_worth')

d12 = df12_full[component %chin% donors[['d12_inds']]]
d12[, subsq_giver := component %chin% donors[['d16_inds']]]
d12 = d12[, .(sq = mean(subsq_giver)), pctile]
d12[, class := 'Obama to Clinton']
d12[, cycle := '2012 to 2016']

r12 = df12_full[component %chin% donors[['r12_inds']]]
r12[, subsq_giver := component %chin% donors[['r16_inds']]]
r12 = r12[, .(sq = mean(subsq_giver)), pctile]
r12[, class := 'Romney to Trump \'16']
r12[, cycle := '2012 to 2016']

d20 = df12_full[component %chin% donors[['d12_inds']]]
d20[, subsq_giver := component %chin% donors[['d20_inds']]]
d20 = d20[, .(sq = mean(subsq_giver)), pctile]
d20[, class := 'Obama to Biden']
d20[, cycle := '2012 to 2020']

r20 = df12_full[component %chin% donors[['r12_inds']]]
r20[, subsq_giver := component %chin% donors[['r20_inds']]]
r20 = r20[, .(sq = mean(subsq_giver)), pctile]
r20[, class := 'Romney to Trump \'20']
r20[, cycle := '2012 to 2020']

fig6_nw = rbindlist(list(d12, r12, d20, r20))
fig6_nw[, pctile := round(pctile * 100)]
fig6_nw[, measure := 'Net Worth (L2)']

# recut quantiles in original data and compare

coll = c(NET_WORTH_PCTILES, 1)

df20[, nat_dec := make_bins(total, coll)]
df16[, nat_dec := make_bins(total, coll)]
df12[, nat_dec := make_bins(total, coll)]

d12 = df12[component %chin% donors[['d12_inds']]]
d12[, subsq_giver := component %chin% donors[['d16_inds']]]
d12 = d12[, .(sq = mean(subsq_giver)), nat_dec]
d12[, class := 'Obama to Clinton']
d12[, cycle := '2012 to 2016']

r12 = df12[component %chin% donors[['r12_inds']]]
r12[, subsq_giver := component %chin% donors[['r16_inds']]]
r12 = r12[, .(sq = mean(subsq_giver)), nat_dec]
r12[, class := 'Romney to Trump \'16']
r12[, cycle := '2012 to 2016']

d20 = df12[component %chin% donors[['d12_inds']]]
d20[, subsq_giver := component %chin% donors[['d20_inds']]]
d20 = d20[, .(sq = mean(subsq_giver)), nat_dec]
d20[, class := 'Obama to Biden']
d20[, cycle := '2012 to 2020']

r20 = df12[component %chin% donors[['r12_inds']]]
r20[, subsq_giver := component %chin% donors[['r20_inds']]]
r20 = r20[, .(sq = mean(subsq_giver)), nat_dec]
r20[, class := 'Romney to Trump \'20']
r20[, cycle := '2012 to 2020']

fig6_nw_corr = rbindlist(list(d12, r12, d20, r20))
fig6_nw_corr[, nat_dec := as.numeric(as.character(nat_dec))]
fig6_nw_corr[, nat_dec := round(nat_dec * 100)]
setnames(fig6_nw_corr, 'nat_dec', 'pctile')
fig6_nw_corr[, measure := 'NWR']

save(fig6_income, fig6_income_corr, fig6_nw, fig6_nw_corr, file = 'summary_data/figA10.rda')
